itscausal

a package for flexible estimation of interrupted time series

Aurélien Sallin, Tobias Müller, Daniel Ammann

Simulation: Introduction

We simulate three processes (changes in the error structure):

  1. AR Process
ar_process <- function(n, phi = 0.7, sigma = 1) {
  e <- rnorm(n, 0, sigma)
  x <- numeric(n)
  x[1] <- e[1]
  for (t in 2:n) {
    x[t] <- phi * x[t - 1] + e[t]
  }
  return(x)
}
  1. ARMA Process
  2. Process with heteroskedastic errors

Simulation: Introduction

We simulate three processes (changes in the error structure):

  1. AR Process
  2. ARMA Process
arma_process <- function(n, ar = 0.7, ma = 0.3, sigma = 1) {
  e <- rnorm(n, 0, sigma)
  x <- numeric(n)
  x[1] <- e[1]
  for (t in 2:n) {
    x[t] <- ar * x[t - 1] + e[t] + ma * e[t - 1]
  }
  return(x)
}
  1. Process with heteroskedastic errors

Simulation: Introduction

We simulate three processes (changes in the error structure):

  1. AR Process
  2. ARMA Process
  3. Process with heteroskedastic errors
heteroskedastic_errors <- function(n, base_sigma = 1, increase_rate = 0.1) {
  sigma <- base_sigma + increase_rate * (1:n)
  return(rnorm(n, 0, sigma))
}

Data generation

# Generate simulated dataset
n_time <- 100 # Number of time points
intervention <- round(0.8 * n_time) # Time point of intervention
n_id <- 60 # Number of unique individuals
constant <- 1

X <- sample(c(0, 1), n_id, T)
param_timetrend <- 0.07
param_dummyintervention <- -0.15
param_slopeintervention <- -0.10

# Seasonality
season_effect <- c(
  0,
  rnorm(2, 0, 4),
  rnorm(4, -2, 0.5),
  rnorm(3, 0, 1)
)

Simulate an ITS

df <- data.frame(
  ID = rep(seq(1:n_id), each = n_time),
  X = rep(X, each = n_time),
  season = rep(
    c(rep(1:12, n_time %/% 12), (1:12)[1:(n_time %% 12)]),
    n_id
  ),
  id.effect = rep(rnorm(n_id, 0, 10), each = n_time),
  season_effect = rep(season_effect, times = n_time),
  post = rep(c(rep(0, intervention), rep(1, 0.2 * n_time)), n_id),
  time = rep((1:n_time) - intervention, n_id),
  error_simple = rnorm(n_time * n_id, 0, 2),
  error_ar = unlist(replicate(n_id, ar_process(n_time, sigma = 2), simplify = FALSE)),
  error_arma = unlist(replicate(n_id, arma_process(n_time), simplify = FALSE)),
  error_heteroskedastic = unlist(replicate(n_id, heteroskedastic_errors(n_time), simplify = FALSE))
)

df$abs_time <- df$time + abs(min(df$time))

Visualize the simulated DGP

Compute ATE

# Compute effects
df <- df[, ite := ifelse(post == 0, NA, model - forecast)]
ate5 <- mean(df[time < 6 & post == 1, ]$ite)
ate1 <- mean(df[time < 2 & post == 1, ]$ite)

print(ate5)
[1] 8.35125
print(ate1)
[1] 8.15125

We can reproduce these results using the itscausal package.

window <- 36L # define window of time

fore_y_simple <- forecastITS(
  data = df,
  time = "time",
  INDEX = 0L, # time of the intervention
  WINDOW = window,
  STEPS = 5,
  covariates_time = c("season"), # define seasonality
  covariates_fix = c("X"), # define covariates
  key = "ID", # define the unit of observation
  y = "y_simple",
  method = c("lm", "xgboost"), # define method.
  K = 5 # define the number of cross-validation folds
  # optim = TRUE # whether or not the algorithm is optimized
)

Graph

Compute the instantaneous treatment effect for the whole population

Per groupdf: arma

[1] "ARMA (1 period): mean =  7.598 ; sd =  0.248"
[1] "ARMA (5 periods): mean =  8.241 ; sd =  0.624"
[1] 8.15125
[1] 8.35125